from lxml import etree

tree = etree.parse("test.html")
# xpath 的顺序是从1 开始数的  [数字] 表示索引
# result = tree.xpath('/html/body/ul/li[1]/a/text()')

# [@xxx=xxx]  属性的筛选
# result = tree.xpath('/html/body/ol/li/a[@href="dapao"]/text()')

ol_li_list = tree.xpath('/html/body/ol/li')
for li in ol_li_list:
    name = li.xpath('./a/text()')  # 在li 中继续去寻找, 相对查找
    print(name)

    # 取属性中的内容: @属性
    href = li.xpath('./a/@href')
    print(href)

print(tree.xpath("/html/body/ul/li/a/@href"))



